ct <- factor(Phenotype_met_design, 
       levels = c("IDHm.DNMT3Am.WT1m.FLT3m.TET2WT",
                  "IDHm.DNMT3Am.WT1WT.FLT3AWT.TET2WT",
                  "IDHm.DNMT3Am.WT1WT.FLT3m.TET2WT",
                  "IDHm.DNMT3AWT.WT1m.FLT3AWT.TET2WT",
                  "IDHm.DNMT3AWT.WT1WT.FLT3AWT.TET2WT",
                  "IDHWT.DNMT3Am.WT1WT.FLT3AWT.TET2WT",
                  "IDHWT.DNMT3Am.WT1WT.FLT3m.TET2WT",
                  "IDHWT.DNMT3AWT.WT1m.FLT3AWT.TET2WT",
                  "IDHWT.DNMT3AWT.WT1WT.FLT3AWT.TET2m", 
                  "IDHWT.DNMT3AWT.WT1WT.FLT3AWT.TET2WT",
                  "IDHWT.DNMT3AWT.WT1WT.FLT3m.TET2WT"),
       labels = c("IDHm.DNMT3Am.WT1m.FLT3m", 
                  "IDHm.DNMT3Am",
                  "IDHm.DNMT3Am.FLT3m",
                  "IDHm.WT1m",
                  "IDHm",
                  "DNMT3Am",
                  "DNMT3Am.FLT3m",
                  "WT1m",
                  "TET2m", 
                  "WT",
                  "FLT3m"))
design <- model.matrix(~ 0 + ct)
colnames(design) <- levels(ct)
fit <- lmFit(BMIQ_met, design)
contrasts <- makeContrasts(`IDHm-WT` = IDHm - WT,
                           `DNMT3Am-WT` = DNMT3Am - WT,
                           `WT1m-WT` = WT1m - WT,
                           `FLT3m-WT` = FLT3m - WT,
                           `TET2m-WT` = TET2m - WT,
                           levels = design)

move <- function(data, cols, ref, side = c("before", "after")) {
  if (!requireNamespace("dplyr")) {
    stop("Make sure package 'dplyr' is installed to use function 'move'")
  }
  side <- match.arg(side)
  cols <- rlang::enquo(cols)
  ref <- rlang::enquo(ref)
  if (side == "before") {
    dplyr::select(data, 1:!!ref, -!!ref, -!!cols, !!cols, dplyr::everything())
  } else {
    dplyr::select(data, 1:!!ref, -!!cols, !!cols, dplyr::everything())
  }
}

fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend = TRUE)
FitList <- list()
for (i in 1:ncol(contrasts)) {
  FitList[[colnames(contrasts)[i]]] <- topTable(fit2, coef = i, number = nrow(BMIQ_met)) %>%
    mutate(ID = rownames(.)) %>%
    move(., ID, logFC, side = "before") #%>%
    # filter(adj.P.Val < 0.05)
}
comparisons <- names(FitList)
Create_heatmap_corr <- function(DATA, remove_cases = 0, Phenotype, corr_method = "pearson") {
  name <- ""
  if (class(remove_cases) != "logical" ){
    number_of_cases <- length(DATA[1,])
    remove_cases <- rep(TRUE, number_of_cases)
    name = "full_data"
  }
  DATA_filtered <- DATA[,remove_cases]
  Phenotype_filtered <- Phenotype[remove_cases,]
  matrix <- as.matrix.data.frame(DATA_filtered)
  ann_colors <- list(
    DNMT3a = c(DNMT3AWT = "brown1", DNMT3Am = "cyan4"),
    IDH = c(IDHWT = "bisque1", IDHm = "coral3"),
    WT1 = c(WT1WT = "chartreuse4", WT1m = "brown4"),
    FLT3 = c(FLT3AWT = "brown", FLT3m = "darkgoldenrod1"),
    TET2 = c(TET2WT = "brown1", TET2m = "cyan4"))
  annotation_for_heatmap <- data.frame(IDH = Phenotype_filtered$IDH, DNMT3a = Phenotype_filtered$DNMT3A, WT1 = Phenotype_filtered$WT1, FLT3 = Phenotype_filtered$FLT3, TET2 = Phenotype_filtered$TET2)
  rownames(annotation_for_heatmap) <- colnames(DATA_filtered)
  corr <- rcorr(matrix, type = corr_method)$r
  name <- ifelse(name == "full_data", name, deparse(substitute(remove_cases)))
  colnames(corr) <- colnames(DATA_filtered)
  heatmap <- pheatmap(corr, 
         color = colorRampPalette(rev(brewer.pal(n = 11, name = "RdYlBu")))(100),
         annotation_col = annotation_for_heatmap,
         annotation_colors = ann_colors,
         legend = TRUE,
         treeheight_row = 20,
         main = paste0("Heatmap ",  deparse(substitute(DATA)), " ", name, " ", corr_method, " correlation"), 
         fontsize = 10
         )
  return(heatmap)
} 

heat_BMIQ_all_data <- Create_heatmap_corr(BMIQ_met, Phenotype = Phenotype_met)

single_mutation <- rowSums(design[,c(1:4,7)]) == 0
heat_single_mutation <- Create_heatmap_corr(BMIQ_met, single_mutation, Phenotype_met)

png(filename = "Results/heatmap_BMIQ_allData.png", width = 1920, height = 1080)
heat_BMIQ_all_data
dev.off()
## png 
##   2
png(filename = "Results/heatmap_BMIQ_single_mutation.png", width = 1920, height = 1080)
heat_single_mutation
dev.off()
## png 
##   2
no_wild_type <- design[,10] == 0
heat_no_WT <- Create_heatmap_corr(BMIQ_met, no_wild_type, Phenotype_met)

png(filename = "Results/heatmap_BMIQ_no_wild_type.png", width = 1920, height = 1080)
heat_no_WT
dev.off()
## png 
##   2
no_wild_type_single_mutation <- rowSums(design[,c(1:4,7, 10)]) == 0
heat_no_WT_single_mutation <- Create_heatmap_corr(BMIQ_met, no_wild_type_single_mutation, Phenotype_met)

png(filename = "Results/heatmap_BMIQ_no_wild_type_single_mutation.png", width = 1920, height = 1080)
heat_no_WT_single_mutation
dev.off()
## png 
##   2
deconvolution <- read.delim("deconvolution.txt", row.names=1)
Cases <- ALL_GDC_sample$`Case ID` %>% str_replace_all(., "-", ".")
deconvolution <- deconvolution[rownames(deconvolution) %in% Cases,]
MacroPatient <- rownames(deconvolution[deconvolution$uncharacterized.cell > 0.5,])
No_Macro_deconv <- colnames(BMIQ_met) %in% MacroPatient
names(No_Macro_deconv) <- seq(1:length(No_Macro_deconv))
heat_no_M2_patients <- Create_heatmap_corr(DATA = BMIQ_met, remove_cases = No_Macro_deconv, Phenotype = Phenotype_met, corr_method = "pearson")

png(filename = "Results/heat_no_M2_patients.png", width = 1920, height = 1080)
heat_no_M2_patients
dev.off()
## png 
##   2
DATA_for_PCA <- cbind(t(BMIQ_met), Phenotype_met)
res.pca <- PCA(DATA_for_PCA, scale.unit = FALSE, ncp = 5, graph = TRUE, quali.sup = c(394047:394051))

png(filename = "Results/Methylation_ellipses_PCA.png", width = 1920, height = 1080)
plotellipses(res.pca, 394047:394051)
dev.off()
## png 
##   2
png(filename = "Results/Methylation_barplot_PCA.png", width = 1920, height = 1080)
fviz_eig(res.pca, addlabels = TRUE)
dev.off()
## png 
##   2
fviz_eig(res.pca, addlabels = TRUE)

plotellipses(res.pca, 394047:394051)

Filter_Differential_Methylation <- function(DATA, pvalue, logFC_treshold) {
  message("[=========================================================]")
  message("[<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]")
  message("[<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]")
  message("-----------------------------------------------------------")
  High_contrast <- DATA[DATA$P.Value < pvalue & DATA$logFC**2 > logFC_treshold**2,]
  message("The number of CpGs differentially methylated is ")
  print(length(High_contrast[,1]))
  return(High_contrast)
}

if (!exists("anno450")){
  anno450 <- read.csv("HumanMethylation450_15017482_v1-2.csv", skip = 7) %>% dplyr::select(., "Name", "CHR", "MAPINFO", "UCSC_RefGene_Name", "UCSC_RefGene_Group", "Relation_to_UCSC_CpG_Island")
}

Filtered_methylation_data <- list()
for (i in names(FitList)){
  message(paste0("\n=== Comparison ", i, " analyze ==="))
  Filtered_methylation_data[[i]] <- Filter_Differential_Methylation(FitList[[i]], pvalue = 0.05, logFC_treshold = 0.3) %>% merge(., anno450, by.x = "ID", by.y = "Name") %>% dplyr::filter(., str_detect(.$ID, "cg"))
}
## 
## === Comparison IDHm-WT analyze ===
## [=========================================================]
## [<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]
## [<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]
## -----------------------------------------------------------
## The number of CpGs differentially methylated is
## [1] 9928
## 
## === Comparison DNMT3Am-WT analyze ===
## [=========================================================]
## [<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]
## [<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]
## -----------------------------------------------------------
## The number of CpGs differentially methylated is
## [1] 1187
## 
## === Comparison WT1m-WT analyze ===
## [=========================================================]
## [<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]
## [<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]
## -----------------------------------------------------------
## The number of CpGs differentially methylated is
## [1] 314
## 
## === Comparison FLT3m-WT analyze ===
## [=========================================================]
## [<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]
## [<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]
## -----------------------------------------------------------
## The number of CpGs differentially methylated is
## [1] 1463
## 
## === Comparison TET2m-WT analyze ===
## [=========================================================]
## [<<<<<<<<<<<<<<<<< Filtering Methylation >>>>>>>>>>>>>>>>>]
## [<<<<<<<<<< Keep cpgs Differentially Methylated >>>>>>>>>>]
## -----------------------------------------------------------
## The number of CpGs differentially methylated is
## [1] 2746
enhanced_volcano <- list()
for (i in names(FitList)) {
  enhanced_volcano[[i]] <- data.frame(logFC = FitList[[i]]$logFC, pvalue = FitList[[i]]$P.Value, cpgs = FitList[[i]]$ID)
  enhanced_volcano[[i]] <- merge(enhanced_volcano[[i]], anno450, by.x = "cpgs", by.y = "Name")
  enhanced_volcano[[i]] <- EnhancedVolcano(
    toptable = enhanced_volcano[[i]],
    lab = enhanced_volcano[[i]]$UCSC_RefGene_Name,
    x = "logFC",
    y = "pvalue",
    FCcutoff = 0.1,
    pCutoff = 0.0001,
    title = "Volcano plot of the differential\nmethylation of CpGs between WT and Mut",
    subtitle = NA,
    legendPosition = "right",
    subtitleLabSize = 0,
    legendLabSize = 10,
    ylim = c(0, 10)
  )
}
comparisons <- names(enhanced_volcano)
j <- 4
png(file = paste0("Results/", comparisons[j], "_Methylation_beta_values_BMIQ_volcano_plot.png"), width = 1920, height = 1080)
par(mfrow = c(1, 1))
enhanced_volcano[[comparisons[j]]]
dev.off()
## png 
##   2
if (!exists("Blueprint_network")) {
  Blueprint_network <- read.csv("BLUEPRINT_fragments_good.tsv", sep = "\t")
  Blueprint_network <- dplyr::select(Blueprint_network, "chr", "start", "end", "type", "ensembl", "gene_names", "intronic_regions", "type") #%>% filter(., type == "P")
  Blueprint_network <- Blueprint_network %>% separate_rows(., gene_names, sep = " ")
  Blueprint_GRanges <- GRanges(
    seqnames = Blueprint_network$chr,
    ranges = IRanges(start = Blueprint_network$start, end = Blueprint_network$end),
    type = Blueprint_network$type,
    gene_names = Blueprint_network$gene_names
  )
}


GRanges_methylations <- list()
match_hit_methylation_promoters <- list()
Promoters_list <- list()
for (i in names(Filtered_methylation_data)) {
  GRanges_methylations[[i]] <- GRanges(
    seqnames = Filtered_methylation_data[[i]]$CHR,
    ranges = IRanges(start = Filtered_methylation_data[[i]]$MAPINFO, end = Filtered_methylation_data[[i]]$MAPINFO + 1),
    logFC = Filtered_methylation_data[[i]]$logFC
  )
  overlaps <- findOverlaps(Blueprint_GRanges, GRanges_methylations[[i]])
  match_hit_methylation_promoters[[i]] <- data.frame(mcols(Blueprint_GRanges[queryHits(overlaps)]), as.data.frame(mcols(GRanges_methylations[[i]])[subjectHits(overlaps), ]), stringsAsFactors = T)
  colnames(match_hit_methylation_promoters[[i]]) <- c("type", "gene_names", "logFC")
  Promoters_list[[i]] <- match_hit_methylation_promoters[[i]][match_hit_methylation_promoters[[i]]$type == "P",]
  message(paste0("\n=== Comparison ", i, " analyze ==="))
  message("=== CpGs Differentially methylated overlapping Genes promoters ===")
  print(length(Promoters_list[[i]]$gene_names))
}
## 
## === Comparison IDHm-WT analyze ===
## === CpGs Differentially methylated overlapping Genes promoters ===
## [1] 7479
## 
## === Comparison DNMT3Am-WT analyze ===
## === CpGs Differentially methylated overlapping Genes promoters ===
## [1] 960
## 
## === Comparison WT1m-WT analyze ===
## === CpGs Differentially methylated overlapping Genes promoters ===
## [1] 295
## 
## === Comparison FLT3m-WT analyze ===
## === CpGs Differentially methylated overlapping Genes promoters ===
## [1] 854
## 
## === Comparison TET2m-WT analyze ===
## === CpGs Differentially methylated overlapping Genes promoters ===
## [1] 1813
if (!exists("pchic")){
  load("pchic.RData")
  pchic <- pchic[, c(1:10)]
  List_Promoter <- paste(pchic$baitChr, pchic$baitStart, sep = "_")
  List_Promoter <- unique(List_Promoter)
  colnames(pchic)[c(1:5, 6:10)] <- rep(c("chr", "start", "end", "ID", "Name"), 2)
  PCHiC_bed <- unique(rbind(pchic[, c(1:3, 5)], pchic[, c(6:8, 10)]))
  PCHiC_GRange <- GRanges(
    seqnames = PCHiC_bed$chr,
    IRanges(start = PCHiC_bed$start, end = PCHiC_bed$end),
    Gene_Pchic = PCHiC_bed$Name
  )
  PCHiC_GRange$ID <- paste(PCHiC_bed$chr, PCHiC_bed$start, sep = "_")
  colnames(pchic) <- c("chr_bait", "start_bait", "end_bait", "ID_bait", "Name_bait", "chr_oe", "start_oe", "end_oe", "ID_oe", "Name_oe")
  pchic$IDbait <- paste(pchic$chr_bait, pchic$start_bait, sep = "_")
  pchic$IDoe <- paste(pchic$chr_oe, pchic$start_oe, sep = "_")
}

matchit_methylome_pchic <- list()
for (i in names(Filtered_methylation_data)){
  overlaps <- findOverlaps(PCHiC_GRange, GRanges_methylations[[i]])
  matchit_methylome_pchic[[i]] <- data.frame(mcols(PCHiC_GRange[queryHits(overlaps)]), as.data.frame(mcols(GRanges_methylations[[i]])[subjectHits(overlaps), ]), stringsAsFactors = T)
  colnames(matchit_methylome_pchic[[i]]) <- c("Gene_pchic", "ID", "logFC")
  matchit_methylome_pchic[[i]]$CpG <- TRUE
  message(paste0("\n=== Comparison ", i, " analyze ==="))
  message("=== CpGs overlapping PCHiC chromatin fragments ===")
  print(length(unique(matchit_methylome_pchic[[i]]$ID)))
}
## 
## === Comparison IDHm-WT analyze ===
## === CpGs overlapping PCHiC chromatin fragments ===
## [1] 5011
## 
## === Comparison DNMT3Am-WT analyze ===
## === CpGs overlapping PCHiC chromatin fragments ===
## [1] 663
## 
## === Comparison WT1m-WT analyze ===
## === CpGs overlapping PCHiC chromatin fragments ===
## [1] 182
## 
## === Comparison FLT3m-WT analyze ===
## === CpGs overlapping PCHiC chromatin fragments ===
## [1] 751
## 
## === Comparison TET2m-WT analyze ===
## === CpGs overlapping PCHiC chromatin fragments ===
## [1] 1379
CpGs_interaction <- list()
for (i in names(matchit_methylome_pchic)){
  CpGs_interaction[[i]] <-  unique(rbind(pchic[which(pchic$IDbait %in% matchit_methylome_pchic[[i]]$ID), ], pchic[which(pchic$IDoe %in% matchit_methylome_pchic[[i]]$ID), ]))
  message(paste0("\n=== Comparison ", i, " analyze ==="))
  message("=== Chromatin fragment connected to CpGs Differentially methylated ===")
  ID_chromatin <- unique(c(CpGs_interaction[[i]]$IDbait, CpGs_interaction[[i]]$IDoe))
  print(length(ID_chromatin))
}
## 
## === Comparison IDHm-WT analyze ===
## === Chromatin fragment connected to CpGs Differentially methylated ===
## [1] 66011
## 
## === Comparison DNMT3Am-WT analyze ===
## === Chromatin fragment connected to CpGs Differentially methylated ===
## [1] 11491
## 
## === Comparison WT1m-WT analyze ===
## === Chromatin fragment connected to CpGs Differentially methylated ===
## [1] 4436
## 
## === Comparison FLT3m-WT analyze ===
## === Chromatin fragment connected to CpGs Differentially methylated ===
## [1] 12893
## 
## === Comparison TET2m-WT analyze ===
## === Chromatin fragment connected to CpGs Differentially methylated ===
## [1] 22391
CpGs_interaction_nodes <- list()
match_hit_CpGs_neighbor_promoter <- list()
for (i in names(CpGs_interaction)) {
  message(paste0("\n=== Comparison ", i, " analyze ==="))
  CpGs_interaction_nodes[[i]] <- CpGs_interaction[[i]][,c(1:3,11,5:8,12,10)]
  colnames(CpGs_interaction_nodes[[i]]) <- rep(c("chr", "start", "end", "ID", "Name"),2)
  CpGs_interaction_nodes[[i]] <- unique(rbind(CpGs_interaction_nodes[[i]][,c(1:5)], CpGs_interaction_nodes[[i]][,c(6:10)])) %>% merge(., matchit_methylome_pchic[[i]], by.x = "ID", by.y = "ID")
  CpGs_interaction_nodes_Granges <- GRanges(
    seqnames = CpGs_interaction_nodes[[i]]$chr,
    ranges = IRanges(start = CpGs_interaction_nodes[[i]]$start, end = CpGs_interaction_nodes[[i]]$end),
    ID = CpGs_interaction_nodes[[i]]$ID,
    logFC = CpGs_interaction_nodes[[i]]$logFC
  )
  overlaps <- findOverlaps(Blueprint_GRanges, CpGs_interaction_nodes_Granges)
  match_hit_CpGs_neighbor_promoter[[i]] <- data.frame(mcols(Blueprint_GRanges[queryHits(overlaps)]), as.data.frame(mcols(CpGs_interaction_nodes_Granges)[subjectHits(overlaps), ]), stringsAsFactors = T)
  colnames(match_hit_CpGs_neighbor_promoter[[i]]) <- c("type", "gene_names", "ID", "logFC")
  
  message("=== Number of nodes ===")
  print(length(match_hit_CpGs_neighbor_promoter[[i]]$type))
  
  message("\n=== Number of nodes corresponding to Promoter ===")
  nodes_promoter <- match_hit_CpGs_neighbor_promoter[[i]][match_hit_CpGs_neighbor_promoter[[i]]$type == "P",]
  print(length(nodes_promoter$gene_names))
  
  message("\n=== Number of genes having Promoter in the neighbor of CpGs DM ===")
  print(length(unique(nodes_promoter$gene_names)))
}
## 
## === Comparison IDHm-WT analyze ===
## === Number of nodes ===
## [1] 11640
## 
## === Number of nodes corresponding to Promoter ===
## [1] 7482
## 
## === Number of genes having Promoter in the neighbor of CpGs DM ===
## [1] 4045
## 
## === Comparison DNMT3Am-WT analyze ===
## === Number of nodes ===
## [1] 1454
## 
## === Number of nodes corresponding to Promoter ===
## [1] 960
## 
## === Number of genes having Promoter in the neighbor of CpGs DM ===
## [1] 610
## 
## === Comparison WT1m-WT analyze ===
## === Number of nodes ===
## [1] 372
## 
## === Number of nodes corresponding to Promoter ===
## [1] 295
## 
## === Number of genes having Promoter in the neighbor of CpGs DM ===
## [1] 210
## 
## === Comparison FLT3m-WT analyze ===
## === Number of nodes ===
## [1] 1437
## 
## === Number of nodes corresponding to Promoter ===
## [1] 854
## 
## === Number of genes having Promoter in the neighbor of CpGs DM ===
## [1] 572
## 
## === Comparison TET2m-WT analyze ===
## === Number of nodes ===
## [1] 2638
## 
## === Number of nodes corresponding to Promoter ===
## [1] 1819
## 
## === Number of genes having Promoter in the neighbor of CpGs DM ===
## [1] 1091
design <- model.matrix(~0 + Phenotype_RNAseq_design)
  
  #Removing heteroscedascity from data
  
v_expr <- voom(DATA_RNAseq[,-77], design, plot = TRUE)

matrix_rna <- v_expr$E
single_mutation <- design[,c(1:4,7)] == 0


heat_RNAseq_all_data <- Create_heatmap_corr(matrix_rna, Phenotype = Phenotype_RNAseq)

single_mutation <- rowSums(design[,c(1:4,7)]) == 0
heat_single_mutation <- Create_heatmap_corr(matrix_rna, single_mutation, Phenotype_RNAseq)

png(filename = "Results/heatmap_RNAseq_allData.png", width = 1920, height = 1080)
heat_RNAseq_all_data
dev.off()
## png 
##   2
png(filename = "Results/heatmap_RNAseq_single_mutation.png", width = 1920, height = 1080)
heat_single_mutation
dev.off()
## png 
##   2
no_wild_type <- design[,10] == 0
heat_no_WT <- Create_heatmap_corr(matrix_rna, no_wild_type, Phenotype_RNAseq)

png(filename = "Results/heatmap_RNAseq_no_wild_type.png", width = 1920, height = 1080)
heat_no_WT
dev.off()
## png 
##   2
no_wild_type_single_mutation <- rowSums(design[,c(1:4,7, 10)]) == 0
heat_no_WT_single_mutation <- Create_heatmap_corr(matrix_rna, no_wild_type_single_mutation, Phenotype_RNAseq)

png(filename = "Results/heatmap_RNAseq_no_wild_type_single_mutation.png", width = 1920, height = 1080)
heat_no_WT_single_mutation
dev.off()
## png 
##   2
MacroPatient <- rownames(deconvolution[deconvolution$uncharacterized.cell > 0.5,])
No_Macro_deconv <- colnames(matrix_rna) %in% MacroPatient
names(No_Macro_deconv) <- seq(1:length(No_Macro_deconv))
heat_no_M2_patients <- Create_heatmap_corr(DATA = matrix_rna, remove_cases = No_Macro_deconv, Phenotype = Phenotype_RNAseq)

png(filename = "Results/heatmap_RNAseq_no_M2_patients.png", width = 1920, height = 1080)
heat_no_M2_patients
dev.off()
## png 
##   2
DATA_RNA_for_PCA <- cbind(t(matrix_rna), Phenotype_RNAseq)
res.pca.rna <- PCA(DATA_RNA_for_PCA, scale.unit = FALSE, ncp = 2, graph = TRUE, quali.sup = c(56494:56498))

png(filename = "Results/RNA_ellipses_PCA.png", width = 1920, height = 1080)
plotellipses(res.pca.rna, 56494:56498)
dev.off()
## png 
##   2
png(filename = "Results/RNA_barplot_PCA.png", width = 1920, height = 1080)
fviz_eig(res.pca.rna, addlabels = TRUE)
dev.off()
## png 
##   2
fviz_eig(res.pca.rna, addlabels = TRUE)

plotellipses(res.pca.rna, 56494:56498)

DGEs <- function(data, Phenotype){
  require(limma)
  require(dplyr)
  #require(dendextend)
  
  
  message("[===========================]")
  message("[<<<<<<< DGEs START >>>>>>>>>]")
  message("[<<<< Pairwise analysis >>>>>]")
  message("-----------------------------")
  
  # This function creates the pairs for the pairwise matrices
  design.pairs <- function(levels) {
    n <- length(levels)
    design <- matrix(0,n,choose(n,2))
    rownames(design) <- levels
    colnames(design) <- 1:choose(n,2)
    k <- 0
    for (i in 1:(n - 1))
      for (j in (i + 1):n) {
        k <- k + 1
        design[i,k] <- 1
        design[j,k] <- -1
        colnames(design)[k] <- paste(levels[i], "-", levels[j],sep = "")
      }
    design
  }
  
  # This function creates the pairs for the pairwise matrices
  
  design <- model.matrix(~0 + Phenotype)
  
  #Removing heteroscedascity from data
  
  v_expr <- voom(data, design, plot = FALSE)
  contr.matrix <- design.pairs(levels(factor(Phenotype)))
  colnames(design) <- rownames(contr.matrix)   
  
  # Fitting linear models for comparisons of interest
  Fit <- lmFit(v_expr, design) %>%
    contrasts.fit(., contr.matrix) %>%
    eBayes(., trend = TRUE)
  
  FitList <- list()
  for (i in 1:ncol(contr.matrix)) {
    FitList[[i]] <- topTable(Fit, coef = i, adjust.method = "BH", number = nrow(v_expr$E)) %>%
      mutate(ID = rownames(.))
    
    message(paste0(i, " done"))
    
  }
  names(FitList) <- colnames(contr.matrix)
  return(FitList)
}

DGE <- DGEs(data = DATA_RNAseq[,1:(length(DATA_RNAseq[1,])-1)], Phenotype_RNAseq_design)
## [===========================]
## [<<<<<<< DGEs START >>>>>>>>>]
## [<<<< Pairwise analysis >>>>>]
## -----------------------------
## 1 done
## 2 done
## 3 done
## 4 done
## 5 done
## 6 done
## 7 done
## 8 done
## 9 done
## 10 done
## 11 done
## 12 done
## 13 done
## 14 done
## 15 done
## 16 done
## 17 done
## 18 done
## 19 done
## 20 done
## 21 done
## 22 done
## 23 done
## 24 done
## 25 done
## 26 done
## 27 done
## 28 done
## 29 done
## 30 done
## 31 done
## 32 done
## 33 done
## 34 done
## 35 done
## 36 done
## 37 done
## 38 done
## 39 done
## 40 done
## 41 done
## 42 done
## 43 done
## 44 done
## 45 done
## 46 done
## 47 done
## 48 done
## 49 done
## 50 done
## 51 done
## 52 done
## 53 done
## 54 done
## 55 done
name_comparison <- function(name) {
  name_splited <- strsplit(name, split = "[-]")[[1]]
  name_left <- name_splited[1]
  name_right <- name_splited[2]
  name_left <- strsplit(name_left, split = "[.]")[[1]]
  name_right <- strsplit(name_right, split = "[.]")[[1]]
  name_cleaned_left <- name_left[which(str_detect(name_left, "m"))]
  name_cleaned_right <- name_right[which(str_detect(name_right, "m"))]
  ifelse(isEmpty(name_cleaned_right), name_cleaned_right <- "WT", TRUE)
  ifelse(isEmpty(name_cleaned_left), name_cleaned_left <- "WT", TRUE)
  name_cleaned_left <- paste(as.character(name_cleaned_left), collapse = ".")
  name_cleaned_right <- paste(as.character(name_cleaned_right), collapse = ".")
  name_cleaned <- paste(name_cleaned_left, name_cleaned_right, collapse = "-", sep = "-")
  return(name_cleaned)
}

for (i in seq(length(DGE))) {
  names(DGE)[[i]] <- name_comparison(names(DGE)[[i]])
}
DGE[["WT-FLT3m"]]$logFC <- -DGE[["WT-FLT3m"]]$logFC
names(DGE)[55] <- "FLT3m-WT"

DGE_comparison_of_interest <- list()
comparison_of_interest <- c("IDHm-WT", "WT1m-WT", "DNMT3Am-WT", "FLT3m-WT", "TET2m-WT")
for (i in comparison_of_interest) {
  DGE_comparison_of_interest[[i]] <- DGE[[i]]
}
if (!exists("ensembl")){
  ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
}

add_genes_coordinates <- function(vector_of_genes) {
  
  # Download homo sapiens genes ensembl database
  
  # 
  coordinates <- getBM(attributes = c('ensembl_gene_id', 'hgnc_symbol', 'entrezgene_id'), filters = 'ensembl_gene_id', values = vector_of_genes$ID, mart = ensembl)
  genes_annoted <- merge(x = vector_of_genes, y = coordinates, by.x = "ID", by.y = "ensembl_gene_id", all.x = TRUE)
  return(genes_annoted)
}

Genes_DE <- list()
List_genes_DE <- list()
List_genes_DE[["up"]] <- list()
for (i in names(DGE_comparison_of_interest)) {
  message(paste0("\n=== Comparison ", i, " analyze ==="))
  DGE_comparison_of_interest[[i]] <- add_genes_coordinates(DGE_comparison_of_interest[[i]])
  Genes_DE[[i]] <- DGE_comparison_of_interest[[i]] %>% filter(., (logFC**2 > 2.25 & P.Value < 0.05 & hgnc_symbol != "")) %>% dplyr::select(., logFC, AveExpr, P.Value, hgnc_symbol)
  List_genes_DE[["up"]][[i]] <- as.vector(Genes_DE[[i]] %>% filter(., logFC > 0) %>% dplyr::select(., hgnc_symbol))$hgnc_symbol
  List_genes_DE[["down"]][[i]] <- as.vector(Genes_DE[[i]] %>% filter(., logFC < 0) %>% dplyr::select(., hgnc_symbol))$hgnc_symbol
  write.csv(x = List_genes_DE[["up"]][[i]], file = paste0("Results/", i, "_up_regulated.csv"), row.names = FALSE)
  write.csv(x = List_genes_DE[["down"]][[i]], file = paste0("Results/", i, "_down_regulated.csv"), row.names = FALSE)
  write.csv(x = Genes_DE[[i]], file = paste0("Results/", i, "_DE.csv"), row.names = FALSE)
}
## 
## === Comparison IDHm-WT analyze ===
## 
## === Comparison WT1m-WT analyze ===
## 
## === Comparison DNMT3Am-WT analyze ===
## 
## === Comparison FLT3m-WT analyze ===
## 
## === Comparison TET2m-WT analyze ===
venn.diagram(List_genes_DE[["up"]], filename = "Results/Genes_up_regulated_compared_to_WT.png", width = 1920, height = 1080, imagetype = "png")
## [1] 1
venn.diagram(List_genes_DE[["down"]], filename = "Results/Genes_down_regulated_compared_to_WT.png", width = 1920, height = 1080, imagetype = "png")
## [1] 1
up_entrez <- list()
down_entrez <- list() 
for (i in names(DGE_comparison_of_interest)) {
  up_entrez[[i]] <- DGE_comparison_of_interest[[i]][DGE_comparison_of_interest[[i]]$logFC > 0 & DGE_comparison_of_interest[[i]]$P.Value < 0.05,]
  down_entrez[[i]] <- DGE_comparison_of_interest[[i]][DGE_comparison_of_interest[[i]]$logFC < 0 & DGE_comparison_of_interest[[i]]$P.Value < 0.05,]
}

ego_up <- list()
ego_down <- list()
ego <- list()
for (i in names(DGE_comparison_of_interest)) {
  message("========================================================")
  message("[-----------------Comparison starting-------------------]")
  message(paste0("[--- Comparison: ", i, " in progress ---------]"))
  ego_up[[i]] <- enrichGO(
    gene = unique(up_entrez[[i]]$hgnc_symbol)[-1],
    keyType = "SYMBOL",
    OrgDb = "org.Hs.eg.db",
    ont = "ALL",
    pAdjustMethod = "none"
  )
  ego_down[[i]] <- enrichGO(
    gene = unique(down_entrez[[i]]$hgnc_symbol)[-1],
    keyType = "SYMBOL",
    OrgDb = "org.Hs.eg.db",
    ont = "ALL",
    pAdjustMethod = "none"
  )
  # ego[[i]] <- enrichGO(
  #   gene = c(down_entrez[[i]][["Nodes"]]$hgnc_symbol, up_entrez[[i]][["Nodes"]]$hgnc_symbol),
  #   keyType = "SYMBOL",
  #   OrgDb = "org.Hs.eg.db",
  #   ont = "ALL",
  #   pAdjustMethod = "none"
  # )
}
## ========================================================
## [-----------------Comparison starting-------------------]
## [--- Comparison: IDHm-WT in progress ---------]
## ========================================================
## [-----------------Comparison starting-------------------]
## [--- Comparison: WT1m-WT in progress ---------]
## ========================================================
## [-----------------Comparison starting-------------------]
## [--- Comparison: DNMT3Am-WT in progress ---------]
## ========================================================
## [-----------------Comparison starting-------------------]
## [--- Comparison: FLT3m-WT in progress ---------]
## ========================================================
## [-----------------Comparison starting-------------------]
## [--- Comparison: TET2m-WT in progress ---------]
j <- 5
png(filename = paste0("Results/", comparisons[j], "_up_ego.png"), width = 1920, height = 1080)
dotplot(ego_up[[j]], showCategory = 50 ) + ggtitle(paste0(comparisons[j], " GO terms, Gene Upregulated"))
dev.off()
## png 
##   2
png(filename = paste0("Results/", comparisons[j], "_down_ego.png"), width = 1920, height = 1080)
dotplot(ego_down[[j]], showCategory = 50 ) + ggtitle(paste0(comparisons[j], " GO terms, Gene Downregulated"))
dev.off()
## png 
##   2
DGE_Blueprint <- list()
for (i in names(DGE_comparison_of_interest)) {
  DGE_Blueprint[[i]] <- merge(DGE_comparison_of_interest[[i]], Blueprint_network, by.x = "ID", by.y = "ensembl")
}

find_overlap_between_genes_and_chromatine_network <- function(DATA, bed_file) {
  message("\n[====================================================================================================]")
  message("[<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]")
  message("[<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]")
  message("------------------------------------------------------------------------------------------------------\n")  
  message(paste0("[------------- Comparison: ", i, " in progress ---------]"))
  Genes_Ranges <- GRanges(
    seqnames = DATA$chr,
    ranges = IRanges(start = DATA$start, end = DATA$end),
    genes_from_ensembl = DATA$hgnc_symbol,
    logFC = DATA$logFC,
    pvalue = DATA$P.Value
  )
  overlaps <- findOverlaps(bed_file, Genes_Ranges)
  match_hit <- data.frame(mcols(bed_file[queryHits(overlaps)]), as.data.frame(mcols(Genes_Ranges)[subjectHits(overlaps), ]), stringsAsFactors = T)
  return(match_hit)
}

matchit_RNAseq <- list()
Significatif <- list()
for (i in names(DGE_Blueprint)) {
  matchit_RNAseq[[i]] <- find_overlap_between_genes_and_chromatine_network(DGE_Blueprint[[i]], PCHiC_GRange)
  message("=== Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===")
  Significatif[[i]] <- matchit_RNAseq[[i]][matchit_RNAseq[[i]]$logFC**2 > 1.5**2 & matchit_RNAseq[[i]]$pvalue < 0.05,]
  print(length(unique(Significatif[[i]]$genes_from_ensembl)))
}
## 
## [====================================================================================================]
## [<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]
## [<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]
## ------------------------------------------------------------------------------------------------------
## [------------- Comparison: IDHm-WT in progress ---------]
## === Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===
## [1] 407
## 
## [====================================================================================================]
## [<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]
## [<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]
## ------------------------------------------------------------------------------------------------------
## [------------- Comparison: WT1m-WT in progress ---------]
## === Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===
## [1] 136
## 
## [====================================================================================================]
## [<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]
## [<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]
## ------------------------------------------------------------------------------------------------------
## [------------- Comparison: DNMT3Am-WT in progress ---------]
## === Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===
## [1] 116
## 
## [====================================================================================================]
## [<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]
## [<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]
## ------------------------------------------------------------------------------------------------------
## [------------- Comparison: FLT3m-WT in progress ---------]
## === Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===
## [1] 1184
## 
## [====================================================================================================]
## [<< Overlap differentially expressed genes promoter coordinates to chromatine fragment coordinates >>]
## [<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Need Promoter Capture Hi-C >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>]
## ------------------------------------------------------------------------------------------------------
## [------------- Comparison: TET2m-WT in progress ---------]
## === Number of Genes Differentially expressed with |logFC| > 1.5 & Pvalue < 0.05 ===
## [1] 373
for (i in comparisons) {
  message(paste0("\n[------------------------------------------- Comparison: ", i, " in progress -------------------------------------------]"))
  message("=== Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===")
  Promoter_DE_CpG_DM <- Significatif[[i]][Significatif[[i]]$genes_from_ensembl %in% Promoters_list[[i]]$gene_names,]
  print(length(Promoter_DE_CpG_DM$genes_from_ensembl))
  
  message("=== Number of CpGs Up methylated in promoter of Genes down regulated ===")
  CpGs_up <- Promoters_list[[i]] %>% dplyr::filter(., logFC > 0)
  Promoter_DR_CPG_UM <- Significatif[[i]][Significatif[[i]]$genes_from_ensembl %in% CpGs_up$gene_names,] %>% filter(., logFC < 0)
  print(length(unique(Promoter_DR_CPG_UM$genes_from_ensembl)))
  print(unique(Promoter_DR_CPG_UM$genes_from_ensembl))
  
  message("=== Number of CpGs Down methylated in promoter of Genes up regulated ===")
  CpGs_down <- Promoters_list[[i]] %>% dplyr::filter(., logFC < 0)
  Promoter_UR_CPG_DM <- Significatif[[i]][Significatif[[i]]$genes_from_ensembl %in% CpGs_down$gene_names,] %>% filter(., logFC > 0)
  print(length(unique(Promoter_UR_CPG_DM$genes_from_ensembl)))
  print(unique(Promoter_UR_CPG_DM$genes_from_ensembl))
  
  message("\n=== Number of Genes differentially expressed in the neighbor of CpGs ===")
  Promoter_DE_Neighbor <- Significatif[[i]][Significatif[[i]]$genes_from_ensembl %in% match_hit_CpGs_neighbor_promoter[[i]]$gene_names,]
  print(length(Promoter_DE_Neighbor$genes_from_ensembl))
}
## 
## [------------------------------------------- Comparison: IDHm-WT in progress -------------------------------------------]
## === Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===
## [1] 711
## === Number of CpGs Up methylated in promoter of Genes down regulated ===
## [1] 64
##  [1] "AGTRAP"    "CDA"       "COL9A2"    "HENMT1"    "SEMA4A"    "CD1D"     
##  [7] "MAP10"     "PDLIM1"    "UTF1"      "C1QTNF4"   "MS4A3"     "MPZL2"    
## [13] "SLC37A2"   "ZNF385A"   "GTSF1"     "SERP2"     "CEBPE"     "GALC"     
## [19] "LTK"       "PTX4"      "MEFV"      "IL27"      "CD300LB"   "CD300LD"  
## [25] "LGALS3BP"  "ANGPTL6"   "MAG"       "CLC"       "SULT2B1"   "CLEC11A"  
## [31] "QPCT"      "DAPL1"     "CXCR2"     "CD93"      "C20orf197" "S100B"    
## [37] "CX3CR1"    "SPATA12"   "CD96"      "GP9"       "HLA-DMB"   "SUSD3"    
## [43] "FBP1"      "PHYHD1"    "IL2RG"     "ZNF185"    "DHRS3"     "CACNA2D4" 
## [49] "SLC9A3R2"  "CMTM2"     "SLFN13"    "SIGLEC9"   "VSTM1"     "PTH2R"    
## [55] "LINC00649" "S100P"     "TBC1D9"    "LY86-AS1"  "LY86"      "TNF"      
## [61] "HLA-DPA1"  "ADGB"      "MACC1"     "AK8"
## === Number of CpGs Down methylated in promoter of Genes up regulated ===
## [1] 3
## [1] "F2RL1"   "PCDHB15" "CLIC2"
## 
## === Number of Genes differentially expressed in the neighbor of CpGs ===
## [1] 1150
## 
## [------------------------------------------- Comparison: DNMT3Am-WT in progress -------------------------------------------]
## === Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===
## [1] 129
## === Number of CpGs Up methylated in promoter of Genes down regulated ===
## [1] 0
## character(0)
## === Number of CpGs Down methylated in promoter of Genes up regulated ===
## [1] 3
## [1] "MAATS1"  "CPED1"   "DCSTAMP"
## 
## === Number of Genes differentially expressed in the neighbor of CpGs ===
## [1] 259
## 
## [------------------------------------------- Comparison: WT1m-WT in progress -------------------------------------------]
## === Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===
## [1] 7
## === Number of CpGs Up methylated in promoter of Genes down regulated ===
## [1] 2
## [1] "ZSCAN1" "ZNF135"
## === Number of CpGs Down methylated in promoter of Genes up regulated ===
## [1] 0
## character(0)
## 
## === Number of Genes differentially expressed in the neighbor of CpGs ===
## [1] 13
## 
## [------------------------------------------- Comparison: FLT3m-WT in progress -------------------------------------------]
## === Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===
## [1] 359
## === Number of CpGs Up methylated in promoter of Genes down regulated ===
## [1] 10
##  [1] "CD34"    "ZNF695"  "SLC38A1" "ZNF154"  "ITGA9"   "CD200"   "CACNB4" 
##  [8] "ITGA6"   "PTH2R"   "SLCO5A1"
## === Number of CpGs Down methylated in promoter of Genes up regulated ===
## [1] 19
##  [1] "ARTN"      "QSOX1"     "FCAMR"     "PROX1"     "MS4A6A"    "MMP19"    
##  [7] "HAL"       "SLC10A2"   "DTNA"      "HNMT"      "CSTA"      "MARVELD2" 
## [13] "KCNN2"     "TREM1"     "TBX20"     "PENK"      "ONECUT2"   "MEIS1"    
## [19] "LINC00899"
## 
## === Number of Genes differentially expressed in the neighbor of CpGs ===
## [1] 740
## 
## [------------------------------------------- Comparison: TET2m-WT in progress -------------------------------------------]
## === Number of CpGs differentially methylated in promoter of Genes Differentially expressed ===
## [1] 277
## === Number of CpGs Up methylated in promoter of Genes down regulated ===
## [1] 7
## [1] "F3"    "ANXA2" "TUBG2" "MPP6"  "OPHN1" "PGM1"  "PTH2R"
## === Number of CpGs Down methylated in promoter of Genes up regulated ===
## [1] 9
## [1] "NRXN3"  "ALS2"   "ITIH1"  "BTLA"   "KIT"    "RD3"    "NR2F2"  "RNF212"
## [9] "SYCP2L"
## 
## === Number of Genes differentially expressed in the neighbor of CpGs ===
## [1] 668